import numpy as np
import paddle
import paddle.nn.functional as F
try:
    import paddle._legacy_C_ops as C_ops
except:
    import paddle._C_ops as C_ops

from .nms import filter_bbox, nms_bboxes, sigmoid, softmax, pd_multiclass_nms, pd_matrix_nms

def distance2bbox(points, dist):
    # lt, rb = paddle.split(dist,2,-1)
    x1y1 = -dist[...,:2] + points
    x2y2 = dist[...,2:]  + points

    # bbox_xy = - dist[:,:2] + points
    # bbox_wh = dist[:,:2]  + dist[:,2:] 

    out_bbox = np.concatenate([x1y1, x2y2], -1)

    return out_bbox


def get_yolov6_pd_bboxes(outputs, batch_size, params, contents):
    # anchor_points1, stride_tensor1 = _generate_anchors(size)
    channel, height, width = params["input_size"].split(",")
    
    npreds = []
    for idx in range(batch_size):
        pred_scores, pred_dist, anchor_points, stride_tensor = outputs
        pred_bboxes = distance2bbox(anchor_points[None], pred_dist.transpose([0, 2, 1]))
        pred_bboxes *= stride_tensor
        
        preds = pd_multiclass_nms(paddle.to_tensor(pred_bboxes), paddle.to_tensor(pred_scores), 0.03, 0.65, 300, -1, 3000)
        preds = preds.numpy() / (1.,1., int(width), int(height), int(width), int(height))
        npreds.append(preds)
    return npreds

def get_yolov6_pd_bboxes_v1(outputs, batch_size, params, contents):
    # anchor_points1, stride_tensor1 = _generate_anchors(size)
    channel, height, width = params["input_size"].split(",")
    
    npreds = []
    for idx in range(batch_size):
        pred_scores, pred_bboxes = outputs
        
        preds = pd_multiclass_nms(paddle.to_tensor(pred_bboxes), paddle.to_tensor(pred_scores), 0.03, 0.65, 300, -1, 3000)
        preds = preds.numpy() / (1.,1., int(width), int(height), int(width), int(height))
        npreds.append(preds)
    return npreds


def get_ppyolo_pd_bboxes(outputs, batch_size, params, contents):
    # outputs.reverse()
    # channel, height, width = params["input_size"].split(",")

    anchors_list = np.array([float(anch) for anch in params['anchors'].split(",")]).reshape(-1,6)

    npreds = []
    for idx in range(batch_size):
        yolo_boxes, yolo_scores = [], [] 
        content = contents[idx]
        for i in range(len(outputs)):
            head_out = paddle.to_tensor(outputs[i])
            # ori_shape = paddle.to_tensor([[int(height), int(weight)]], dtype="int32")
            ori_shape = paddle.to_tensor([[int(content[3]), int(content[2])]], dtype="int32")
            boxes, scores = paddle.vision.ops.yolo_box(
                head_out,
                ori_shape,
                list(anchors_list[i]),
                80,
                0.01,
                32 // 2**i,
                True,
                scale_x_y=float(params["scale_x_y"])
            )
            # boxes /= paddle.to_tensor([float(width), float(height),float(width), float(height)])
            yolo_boxes.append(boxes)
            yolo_scores.append(paddle.transpose(scores,perm=[0,2,1]))

        yolo_boxes = paddle.concat(yolo_boxes, axis=1)
        yolo_scores = paddle.concat(yolo_scores, axis=2)

        preds = pd_matrix_nms(yolo_boxes, yolo_scores, 0.01, 0.01, 100, -1, -1)
        # preds = pd_multiclass_nms(yolo_boxes, yolo_scores, 0.005, 0.45, 100)
        if preds.size != 0:
            preds[...,2:6] /= paddle.to_tensor([float(content[2]), float(content[3]),float(content[2]), float(content[3])])
        npreds.append(preds.numpy())

    return npreds


def get_yolov3_pd_bboxes(outputs, batch_size, params, contents):
    # outputs.reverse()
    # channel, height, width = params["input_size"].split(",")

    anchors_list = np.array([float(anch) for anch in params['anchors'].split(",")]).reshape(-1,6)

    # outputs_size = params['outputs_size'].split("#")
    # outputs_size_list = [ [int(size) for size in output_size.split(",")] for output_size in outputs_size]

    npreds = []
    for idx in range(batch_size):
        yolo_boxes, yolo_scores = [], [] 
        content = contents[idx]
        for i in range(len(outputs)):
            head_out = paddle.to_tensor(outputs[i])
            # ori_shape = paddle.to_tensor([[int(width), int(height)]], dtype="int32")
            ori_shape = paddle.to_tensor([[int(content[3]), int(content[2])]], dtype="int32")
            boxes, scores = paddle.vision.ops.yolo_box(
                head_out,
                ori_shape,
                list(anchors_list[i]),
                80,
                0.005,
                32 // 2**i,
                True,
                scale_x_y=float(params["scale_x_y"])
            )
            # boxes /= paddle.to_tensor([float(width), float(height),float(width), float(height)])
            yolo_boxes.append(boxes)
            yolo_scores.append(paddle.transpose(scores,perm=[0,2,1]))

        yolo_boxes = paddle.concat(yolo_boxes, axis=1)
        yolo_scores = paddle.concat(yolo_scores, axis=2)

        preds = pd_multiclass_nms(yolo_boxes, yolo_scores, 0.005, 0.45, 100)
        if preds.size != 0:
            preds[...,2:6] /= paddle.to_tensor([float(content[2]), float(content[3]),float(content[2]), float(content[3])])
        npreds.append(preds.numpy())

    return npreds

def get_ppyoloe_pd_bboxes(outputs, batch_size, params, contents):
    channel, height, width = params["input_size"].split(",")

    # outputs_size = params['outputs_size'].split("#")
    # outputs_size_list = [ [int(size) for size in output_size.split(",")] for output_size in outputs_size]

    npreds = []
    for idx in range(batch_size):
        pred_scores, pred_dist, anchor_points, stride_tensor = outputs
        pred_bboxes = distance2bbox(anchor_points[None], pred_dist)
        pred_bboxes *= stride_tensor
        pred_bboxes /= (int(width), int(height), int(width), int(height))
        
        preds = pd_multiclass_nms(paddle.to_tensor(pred_bboxes), paddle.to_tensor(pred_scores), 0.01, 0.7, 300)
        npreds.append(preds.numpy())
        
    return npreds

def get_pico_pd_bboxes(outputs, batch_size, params, contents):
    channel, height, width = params["input_size"].split(",")

    npreds = []
    for idx in range(batch_size):
        pred_bboxes, pred_scores = paddle.to_tensor(outputs[0]),paddle.to_tensor(outputs[1])
        pred_bboxes /= paddle.to_tensor([float(width), float(height),float(width), float(height)])

        preds = pd_multiclass_nms(pred_bboxes, pred_scores, 0.025, 0.6, 100)
        npreds.append(preds.numpy())        
        
    return npreds

class FCOSBox(object):
    __shared__ = ['num_classes']

    def __init__(self, num_classes=80):
        super(FCOSBox, self).__init__()
        self.num_classes = num_classes

    def _merge_hw(self, inputs, ch_type="channel_first"):
        shape_ = paddle.shape(inputs)
        bs, ch, hi, wi = shape_[0], shape_[1], shape_[2], shape_[3]
        img_size = hi * wi
        img_size.stop_gradient = True
        if ch_type == "channel_first":
            new_shape = paddle.concat([bs, ch, img_size])
        elif ch_type == "channel_last":
            new_shape = paddle.concat([bs, img_size, ch])
        else:
            raise KeyError("Wrong ch_type %s" % ch_type)
        new_shape.stop_gradient = True
        return new_shape

    def _postprocessing_by_level(self, locations, box_cls, box_reg, box_ctn,
                                 scale_factor):
        act_shape_cls = self._merge_hw(box_cls)
        box_cls_ch_last = paddle.reshape(x=box_cls, shape=act_shape_cls)
        box_cls_ch_last = F.sigmoid(box_cls_ch_last)

        act_shape_reg = self._merge_hw(box_reg)
        box_reg_ch_last = paddle.reshape(x=box_reg, shape=act_shape_reg)
        box_reg_ch_last = paddle.transpose(box_reg_ch_last, perm=[0, 2, 1])
        box_reg_decoding = paddle.stack(
            [
                locations[:, 0] - box_reg_ch_last[:, :, 0],
                locations[:, 1] - box_reg_ch_last[:, :, 1],
                locations[:, 0] + box_reg_ch_last[:, :, 2],
                locations[:, 1] + box_reg_ch_last[:, :, 3]
            ],
            axis=1)
        box_reg_decoding = paddle.transpose(box_reg_decoding, perm=[0, 2, 1])

        act_shape_ctn = self._merge_hw(box_ctn)
        box_ctn_ch_last = paddle.reshape(x=box_ctn, shape=act_shape_ctn)
        box_ctn_ch_last = F.sigmoid(box_ctn_ch_last)

        # recover the location to original image
        im_scale = paddle.concat([scale_factor, scale_factor], axis=1)
        im_scale = paddle.expand(im_scale, [box_reg_decoding.shape[0], 4])
        im_scale = paddle.reshape(im_scale, [box_reg_decoding.shape[0], -1, 4])
        box_reg_decoding = box_reg_decoding / im_scale
        box_cls_ch_last = box_cls_ch_last * box_ctn_ch_last
        return box_cls_ch_last, box_reg_decoding

    def __call__(self, locations, cls_logits, bboxes_reg, centerness,
                 scale_factor):
        pred_boxes_ = []
        pred_scores_ = []
        for pts, cls, box, ctn in zip(locations, cls_logits, bboxes_reg,
                                      centerness):
            pred_scores_lvl, pred_boxes_lvl = self._postprocessing_by_level(
                pts, cls, box, ctn, scale_factor)
            pred_boxes_.append(pred_boxes_lvl)
            pred_scores_.append(pred_scores_lvl)
        pred_boxes = paddle.concat(pred_boxes_, axis=1)
        pred_scores = paddle.concat(pred_scores_, axis=2)
        return pred_boxes, pred_scores

def get_fcos_pd_bboxes(outputs, batch_size, params, contents):
    channel, height, width = params["input_size"].split(",")
    fcos_bbox = FCOSBox()
    # outputs_size = params['outputs_size'].split("#")
    # outputs_size_list = [ [int(size) for size in output_size.split(",")] for output_size in outputs_size]

    npreds = []
    for idx in range(batch_size):
        locations = [paddle.to_tensor(outputs[0]),paddle.to_tensor(outputs[1]),paddle.to_tensor(outputs[2]),paddle.to_tensor(outputs[3]),paddle.to_tensor(outputs[4])]
        box_cls = [paddle.to_tensor(outputs[5]),paddle.to_tensor(outputs[6]),paddle.to_tensor(outputs[7]),paddle.to_tensor(outputs[8]),paddle.to_tensor(outputs[9])]
        box_reg = [paddle.to_tensor(outputs[10]),paddle.to_tensor(outputs[11]),paddle.to_tensor(outputs[12]),paddle.to_tensor(outputs[13]),paddle.to_tensor(outputs[14])]
        box_ctn =  [paddle.to_tensor(outputs[15]),paddle.to_tensor(outputs[16]),paddle.to_tensor(outputs[17]),paddle.to_tensor(outputs[18]),paddle.to_tensor(outputs[19])]
        scale_factor = paddle.to_tensor([[1., 1.]])
        bboxes, score = fcos_bbox(locations, box_cls, box_reg,
                                        box_ctn, scale_factor)  
        bboxes /= paddle.to_tensor([float(width), float(height),float(width), float(height)])
        
        preds = pd_multiclass_nms(bboxes, score, 0.025, 0.6, 100, -1)
        npreds.append(preds.numpy())        
        
    return npreds

def _simple_nms(heat, kernel=3):
    pad = (kernel - 1) // 2
    hmax = F.max_pool2d(heat, kernel, stride=1, padding=pad)
    keep = paddle.cast(hmax == heat, 'float32')
    return heat * keep

def _topk(scores):
    k = 100
    shape_fm = paddle.shape(scores)
    shape_fm.stop_gradient = True
    cat, height, width = shape_fm[1], shape_fm[2], shape_fm[3]
    # batch size is 1
    scores_r = paddle.reshape(scores, [cat, -1])
    topk_scores, topk_inds = paddle.topk(scores_r, k)
    topk_ys = topk_inds // width
    topk_xs = topk_inds % width

    topk_score_r = paddle.reshape(topk_scores, [-1])
    topk_score, topk_ind = paddle.topk(topk_score_r, k)
    k_t = paddle.full(paddle.shape(topk_ind), k, dtype='int64')
    topk_clses = paddle.cast(paddle.floor_divide(topk_ind, k_t), 'float32')

    topk_inds = paddle.reshape(topk_inds, [-1])
    topk_ys = paddle.reshape(topk_ys, [-1, 1])
    topk_xs = paddle.reshape(topk_xs, [-1, 1])
    topk_inds = paddle.gather(topk_inds, topk_ind)
    topk_ys = paddle.gather(topk_ys, topk_ind)
    topk_xs = paddle.gather(topk_xs, topk_ind)

    return topk_score, topk_inds, topk_clses, topk_ys, topk_xs

def get_center_pd_bboxes(outputs, batch_size, params, contents):
    channel, height, width = params["input_size"].split(",")

    npreds = []
    for idx in range(batch_size):
        hm, wh, reg = paddle.to_tensor(outputs[0]), paddle.to_tensor(outputs[2]),paddle.to_tensor(outputs[1])

        heat = _simple_nms(hm)
        scores, inds, topk_clses, ys, xs = _topk(heat)
        scores = scores.unsqueeze(1)
        clses = topk_clses.unsqueeze(1)

        reg_t = paddle.transpose(reg, [0, 2, 3, 1])
        # Like TTFBox, batch size is 1.
        # TODO: support batch size > 1
        reg = paddle.reshape(reg_t, [-1, reg_t.shape[-1]])
        reg = paddle.gather(reg, inds)
        xs = paddle.cast(xs, 'float32')
        ys = paddle.cast(ys, 'float32')
        xs = xs + reg[:, 0:1]
        ys = ys + reg[:, 1:2]

        wh_t = paddle.transpose(wh, [0, 2, 3, 1])
        wh = paddle.reshape(wh_t, [-1, wh_t.shape[-1]])
        wh = paddle.gather(wh, inds)

        x1 = xs - wh[:, 0:1] / 2
        y1 = ys - wh[:, 1:2] / 2
        x2 = xs + wh[:, 0:1] / 2
        y2 = ys + wh[:, 1:2] / 2

        x1 = x1 * 4
        y1 = y1 * 4
        x2 = x2 * 4
        y2 = y2 * 4

        bboxes = paddle.concat([x1, y1, x2, y2], axis=1)
        bboxes /= paddle.to_tensor([float(width), float(height),float(width), float(height)])
        preds = paddle.concat([clses, scores, bboxes], axis=1)
        npreds.append(preds.numpy())        
        
    return npreds

def _to_list(l):
    if isinstance(l, (list, tuple)):
        return list(l)
    return [l]

class AnchorGeneratorSSD(object):
    def __init__(self,
                 steps=[8, 16, 32, 64, 100, 300],
                 aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
                 min_ratio=15,
                 max_ratio=90,
                 base_size=300,
                 min_sizes=[30.0, 60.0, 111.0, 162.0, 213.0, 264.0],
                 max_sizes=[60.0, 111.0, 162.0, 213.0, 264.0, 315.0],
                 offset=0.5,
                 flip=True,
                 clip=False,
                 min_max_aspect_ratios_order=False):
        self.steps = steps
        self.aspect_ratios = aspect_ratios
        self.min_ratio = min_ratio
        self.max_ratio = max_ratio
        self.base_size = base_size
        self.min_sizes = min_sizes
        self.max_sizes = max_sizes
        self.offset = offset
        self.flip = flip
        self.clip = clip
        self.min_max_aspect_ratios_order = min_max_aspect_ratios_order

        if self.min_sizes == [] and self.max_sizes == []:
            num_layer = len(aspect_ratios)
            step = int(
                math.floor(((self.max_ratio - self.min_ratio)) / (num_layer - 2
                                                                  )))
            for ratio in six.moves.range(self.min_ratio, self.max_ratio + 1,
                                         step):
                self.min_sizes.append(self.base_size * ratio / 100.)
                self.max_sizes.append(self.base_size * (ratio + step) / 100.)
            self.min_sizes = [self.base_size * .10] + self.min_sizes
            self.max_sizes = [self.base_size * .20] + self.max_sizes

        self.num_priors = []
        for aspect_ratio, min_size, max_size in zip(
                aspect_ratios, self.min_sizes, self.max_sizes):
            if isinstance(min_size, (list, tuple)):
                self.num_priors.append(
                    len(_to_list(min_size)) + len(_to_list(max_size)))
            else:
                self.num_priors.append((len(aspect_ratio) * 2 + 1) * len(
                    _to_list(min_size)) + len(_to_list(max_size)))

    def __call__(self, inputs, image):
        boxes = []
        for input, min_size, max_size, aspect_ratio, step in zip(
                inputs, self.min_sizes, self.max_sizes, self.aspect_ratios,
                self.steps):
            box, _ = self.prior_box(
                input=input,
                image=image,
                min_sizes=_to_list(min_size),
                max_sizes=_to_list(max_size),
                aspect_ratios=aspect_ratio,
                steps=[step, step])
            boxes.append(paddle.reshape(box, [-1, 4]))
        return boxes
    
    def prior_box(self, input, image, min_sizes, max_sizes=None, aspect_ratios=[1.], variance=[0.1, 0.1, 0.2, 0.2], steps=[0.0, 0.0]):
        attrs = ('min_sizes', min_sizes, 'aspect_ratios', aspect_ratios,
                 'variances', variance, 'flip', self.flip, 'clip', self.clip, 'step_w',
                 steps[0], 'step_h', steps[1], 'offset', self.offset,
                 'min_max_aspect_ratios_order', self.min_max_aspect_ratios_order)
        if max_sizes is not None and len(max_sizes) > 0:
            attrs += ('max_sizes', max_sizes)
        box, var = C_ops.prior_box(input, image, *attrs)
        return box, var

def get_ssd_pd_bboxes(outputs, batch_size, params, start_idx):
    channel, height, width = params["input_size"].split(",")
    feat_size = [int(val) for val in params["feat_size"].split(",")]
    aspect_ratios = [[float(ratio) for ratio in ratios.split(",") ] for ratios in params["aspect_ratios"].split("#")]
    steps = [int(val) for val in params["steps"].split(",")]
    min_ratio = int(params["min_ratio"])
    if "#" in params["min_sizes"]:
        min_sizes = [[ [] if size=="-1" else float(size) for size in sizes.split(",") ] for sizes in params["min_sizes"].split("#")]
        max_sizes = [[ [] if size=="-1" else float(size) for size in sizes.split(",") ] for sizes in params["max_sizes"].split("#")]
    else:
        min_sizes = [ [] if size=="-1" else float(size) for size in params["min_sizes"].split(",") ] 
        max_sizes = [ [] if size=="-1" else float(size) for size in params["max_sizes"].split(",") ]
    min_max_aspect_ratios_order = True if "min_max_aspect_ratios_order" in params.keys() else False
    ssd_bbox =  AnchorGeneratorSSD(aspect_ratios=aspect_ratios,steps=steps,min_ratio=min_ratio,
                    min_sizes=min_sizes, max_sizes=max_sizes,min_max_aspect_ratios_order=min_max_aspect_ratios_order)
    
    npreds = []
    for idx in range(batch_size):

        inputs_fake = [paddle.to_tensor(np.ones((1,1,int(val),int(val)),dtype=np.float32)) for val in feat_size]
        image_fake = paddle.to_tensor(np.ones((1,1,int(height),int(width)),dtype=np.float32))
        prior_boxes = ssd_bbox(inputs_fake, image_fake)
        boxes = [paddle.to_tensor(outputs[i]) for i in range(len(outputs)//2)]
        scores = [paddle.to_tensor(outputs[i+len(outputs)//2]) for i in range(len(outputs)//2)]
        boxes = paddle.concat(boxes, axis=1)
        prior_boxes = paddle.concat(prior_boxes)

        pb_w = prior_boxes[:, 2] - prior_boxes[:, 0] 
        pb_h = prior_boxes[:, 3] - prior_boxes[:, 1]
        pb_x = prior_boxes[:, 0] + pb_w * 0.5
        pb_y = prior_boxes[:, 1] + pb_h * 0.5
        out_x = pb_x + boxes[:, :, 0] * pb_w * 0.1
        out_y = pb_y + boxes[:, :, 1] * pb_h * 0.1
        out_w = paddle.exp(boxes[:, :, 2] * 0.2) * pb_w
        out_h = paddle.exp(boxes[:, :, 3] * 0.2) * pb_h
        output_boxes = paddle.stack(
            [
                out_x - out_w / 2., out_y - out_h / 2., out_x + out_w / 2.,
                out_y + out_h / 2.
            ],
            axis=-1)

        output_scores = F.softmax(paddle.concat(
            scores, axis=1)).transpose([0, 2, 1])
        
        # preds = pd_multiclass_nms(output_boxes, output_scores, 0.01, 0.3, 750, 20, 3000)
        preds = pd_multiclass_nms(output_boxes, output_scores, 0.01, 0.45, 200, 20, 400)
        preds = preds.numpy()
        if len(preds) != 0:
            indexes = np.ones((len(preds), 1), dtype=np.float32) * (start_idx+idx)
            preds = np.concatenate((indexes, preds), axis=-1)
            preds[:, 1] = preds[:, 1] + 1
        npreds.append(preds)        
        
    return npreds

def get_yolov7_pd_bboxes_v1(outputs, batch_size, params, contents):
    # anchor_points1, stride_tensor1 = _generate_anchors(size)
    channel, height, width = params["input_size"].split(",")
    
    npreds = []
    for idx in range(batch_size):
        pred_scores, pred_bboxes = outputs[0][...,4:], outputs[0][...,:4]
        
        preds = pd_multiclass_nms(paddle.to_tensor(pred_bboxes), paddle.to_tensor(pred_scores).transpose([0,2,1]), 0.001, 0.7, 300, -1, 10000)
        preds = preds.numpy() / (1.,1., int(width), int(height), int(width), int(height))
        npreds.append(preds)
    return npreds

def get_yolov7_pd_bboxes(outputs, batch_size, params, contents):
    # anchor_points1, stride_tensor1 = _generate_anchors(size)
    channel, height, width = params["input_size"].split(",")

    anchors_list = np.array([float(anch) for anch in params['anchors'].split(",")]).reshape(-1,3,1,1,2).astype('float32')
    outputs_size = params['outputs_size'].split("#")
    outputs_size_list = [ [int(size) for size in output_size.split(",")] for output_size in outputs_size]
    
    strides = [8,16,32]
    npreds = []
    for idx in range(batch_size):
        bboxes_list, scores_list = [], []
        for i in range(len(outputs)):
            _, ny, nx = outputs_size_list[i][0], outputs_size_list[i][1], outputs_size_list[i][2]
            output = outputs[i].reshape(1, 3, -1, ny, nx)
            output = np.transpose(sigmoid(output), (0,1,3,4,2))

            anchors = anchors_list[i]

            bbox_confs = output[..., 4:5]
            bbox_cls = output[..., 5:]

            yv, xv = np.meshgrid(np.arange(ny),np.arange(nx))
            grid = np.stack((yv, xv), 2).reshape(1, 1, ny, nx, 2).astype('float32')
            bbox_xy = (output[..., :2]*2- 0.5 + grid) * strides[i]
            bbox_wh = ((output[..., 2:4]*2) **2) * anchors

            bbox_x1y1 = bbox_xy - bbox_wh / 2.
            bbox_x2y2 = bbox_xy + bbox_wh / 2.

            bboxes = np.concatenate((bbox_x1y1, bbox_x2y2), axis=-1)
            scores = bbox_confs * bbox_cls

            bboxes_list.append(bboxes.reshape(1,-1,4))
            scores_list.append(scores.reshape(1,-1,80))

        pred_scores = np.concatenate(scores_list, axis=1)
        pred_bboxes = np.concatenate(bboxes_list, axis=1)

        preds = pd_multiclass_nms(paddle.to_tensor(pred_bboxes), paddle.to_tensor(pred_scores).transpose([0,2,1]), 0.001, 0.7, 300, -1, 1000)
        preds = preds.numpy() / (1.,1., int(width), int(height), int(width), int(height))
        npreds.append(preds)
    return npreds


def get_yolov8_pd_bboxes(outputs, batch_size, params, contents):
    # anchor_points1, stride_tensor1 = _generate_anchors(size)
    channel, height, width = params["input_size"].split(",")
    
    npreds = []
    for idx in range(batch_size):
        pred_scores, pred_dist, anchor_points, stride_tensor = outputs
        pred_bboxes = distance2bbox(anchor_points[None], pred_dist)
        pred_bboxes *= stride_tensor
        
        preds = pd_multiclass_nms(paddle.to_tensor(pred_bboxes), paddle.to_tensor(pred_scores), 0.001, 0.7, 300, -1, 1000)
        preds = preds.numpy() / (1.,1., int(width), int(height), int(width), int(height))
        npreds.append(preds)
    return npreds